###
libs<-c('scater','loomR','Seurat','patchwork','SeuratDisk','monocle3','ggpubr','cowplot','ggplot2','ggbeeswarm',
'monocle3','limma','edgeR','fitdistrplus','factoextra','ggrepel','tidyverse','pheatmap','reshape2','ComplexHeatmap',
"survminer","survival","ggalluvial",'gtsummary','variancePartition','DirichletReg','cowsay','emmeans')
lapply(libs, require, character.only = TRUE) ; rm(libs)
###
setwd("/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/")
file_717_per_cell_md <- readRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/per_cell_md.rds')
#test <- readRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/717_per_cell_md.rds') # same file
#identical(rownames(test),rownames(file_717_per_cell_md)) # check
### Mark doublets in a binary variable
file_717_per_cell_md$doublet_pred_edit <- file_717_per_cell_md$doublet_pred
file_717_per_cell_md$doublet_pred_edit[file_717_per_cell_md$doublet_pred_edit %in% c('Plasma_B','poss_singlet_cluster','singlet')] <- 'singlet'
### cells x sample
x <- melt(table(file_717_per_cell_md$sample_id))
ggarrange(
ggplot(data=x)+aes(x='Samples',y=log10(value))+theme_classic()+geom_boxplot(),
ggplot(data=x)+aes(y=log10(value))+theme_classic()+geom_density(fill='grey'),ncol=2,align='hv'
)
### Prepare data for composition
x <- file_717_per_cell_md[,which(colnames(file_717_per_cell_md) %in% c('sample_id','visit_type','siteXbatch'))]
rownames(x) <- NULL ; x <- unique(x)
x$site <- tidyr::separate(data.frame(x$siteXbatch),1, sep="_", c("a","b",'c','d'))$a
x$batch <- tidyr::separate(data.frame(x$siteXbatch),1, sep="_", c("a","b",'c','d'))$b
rownames(x) <- x$sample_id
x$sample_id <- NULL ; x$siteXbatch <- NULL
y <- melt(table(file_717_per_cell_md$sample_id))
y <- as.data.frame(y)
y <- y[order(y$value,decreasing = T),]
rownames(y) <- y$Var1
y$Var1 <- NULL
identical(rownames(y),rownames(x))
## [1] FALSE
x<-x[match(rownames(y),rownames(x)),]
identical(rownames(y),rownames(x))
## [1] TRUE
row_ha = rowAnnotation(foo2 = runif(10), bar2 = anno_barplot(runif(10)))
row_ha = rowAnnotation(df=x,
col = list(visit_type = c("baseline_diagnosis" = "pink","other" = "lightgreen",
"relapse_progression" = "orange",
"remission_response" = "deepskyblue1","post_transplant" = "blue"),
site = c("EMORY" = "darkred","MAYO" = "darkblue","MSSM" = "darkgreen","WUSTL" = "darkorange3"),
batch = c("B1" = "grey25","B2" = "grey50","B3" = "grey75","B4" = "grey90")))
Heatmap(as.matrix(log10(y+1)),
col = colorRampPalette(c("steelblue","white","firebrick"))(255),
show_row_names = FALSE , show_column_names = FALSE,
column_names_rot = 45,
row_split = x$site ,right_annotation = row_ha,
border = TRUE, use_raster = FALSE,
name = "Log10(Cells)")
y <- table(file_717_per_cell_md$subcluster_V03072023,file_717_per_cell_md$sample_id)
class(y) <- 'matrix'
identical(colnames(y),rownames(x))
## [1] FALSE
x<-x[match(colnames(y),rownames(x)),]
identical(colnames(y),rownames(x))
## [1] TRUE
col_ha = rowAnnotation(df=x, col = list(visit_type = c("baseline_diagnosis" = "pink",
"other" = "lightgreen",
"relapse_progression" = "orange",
"remission_response" = "deepskyblue1",
"post_transplant" = "blue"),
site = c("EMORY" = "darkred",
"MAYO" = "darkblue",
"MSSM" = "darkgreen",
"WUSTL" = "darkorange3"),
batch = c("B1" = "grey25",
"B2" = "grey50",
"B3" = "grey75",
"B4" = "grey90")))
z <- data.frame(Compartment=rownames(y),Cluster=rownames(y))
z$Compartment[grep('Plasma',z$Compartment)]<- 'Plasma'
z$Compartment[grep('NkT',z$Compartment)]<- 'NkT'
z$Compartment[grep('Myeloid',z$Compartment)]<- 'Myeloid'
z$Compartment[grep('^BEry',z$Compartment)]<- 'BEry'
z$Compartment[grep('^Ery',z$Compartment)]<- 'Ery'
z$Compartment[grep('Full',z$Compartment)]<- 'Fibroblasts'
rownames(z) <- z$Cluster
z$Cluster <- NULL
Heatmap(t(as.matrix(log10(y+1))),
col = colorRampPalette(c("steelblue","white","firebrick"))(255),
show_row_names = FALSE , show_column_names = FALSE, column_names_rot = 45,
row_split = x$visit_type, column_split = z$Compartment,
right_annotation = col_ha,
border = TRUE, use_raster = FALSE,
name = "Log10(Cells)")
y <- table(file_717_per_cell_md$subcluster_V03072023,file_717_per_cell_md$sample_id)
class(y) <- 'matrix'
for(i in 1:ncol(y)){ y[,i] <- y[,i]/sum(y[,i]) }
x <- file_717_per_cell_md[,which(colnames(file_717_per_cell_md) %in% c('davies_based_risk','sample_id','visit_type','siteXbatch','progression_group'))]
rownames(x) <- NULL
x <- unique(x)
x$site <- tidyr::separate(data.frame(x$siteXbatch),1, sep="_", c("a","b",'c','d'))$a
x$batch <- tidyr::separate(data.frame(x$siteXbatch),1, sep="_", c("a","b",'c','d'))$b
rownames(x) <- x$sample_id
x$sample_id <- NULL ; x$siteXbatch <- NULL
identical(colnames(y),rownames(x))
## [1] FALSE
x<-x[match(colnames(y),rownames(x)),]
identical(colnames(y),rownames(x))
## [1] TRUE
### packages and multicore settings
libs<-c("variancePartition","doParallel","BiocParallel")
lapply(libs, require, character.only = TRUE)
## [[1]]
## [1] TRUE
##
## [[2]]
## [1] TRUE
##
## [[3]]
## [1] TRUE
registerDoParallel(makeCluster(10))
###
my_doublets <- unique(file_717_per_cell_md$subcluster_V03072023[file_717_per_cell_md$doublet_pred_edit %in% c('dblet_cluster','poss_dblet_cluster')])
### 4 variables
form <- ~ (1|davies_based_risk) + (1|visit_type) + (1|site) + (1|batch)
### run the model
variance_exprs_matrix <- fitExtractVarPartModel(y, form, x)
pdf(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/figures/review_diag_variance_confounders_risk.pdf',width = 4,height = 2.5)
plotVarPart( sortCols( variance_exprs_matrix ) ) +theme_classic()+ ggtitle( 'Variance') + theme(axis.text.x = element_text(angle = 60,hjust = 1))
dev.off()
## quartz_off_screen
## 2
plotVarPart( sortCols( variance_exprs_matrix ) ) +theme_classic()+ ggtitle( 'Variance') + theme(axis.text.x = element_text(angle = 60,hjust = 1))
### progression
form <- ~ (1|progression_group) + (1|visit_type) + (1|site) + (1|batch)
### run the model
variance_exprs_matrix <- fitExtractVarPartModel(y, form, x)
my_doublets <- as.character(unique(file_717_per_cell_md$subcluster_V03072023[which(!file_717_per_cell_md$doublet_pred_edit %in% 'singlet')]))
### 4 variables
form <- ~ (1|davies_based_risk) + (1|visit_type) + (1|site) + (1|batch)
### run the model
variance_exprs_matrix <- fitExtractVarPartModel(y[!rownames(y) %in% my_doublets,], form, x)
pdf(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/figures/review_diag_variance_confounders_risk_noDB.pdf',width = 4,height = 2.5)
plotVarPart( sortCols( variance_exprs_matrix ) ) +theme_classic()+ ggtitle( 'Variance') + theme(axis.text.x = element_text(angle = 60,hjust = 1))
dev.off()
## quartz_off_screen
## 2
plotVarPart( sortCols( variance_exprs_matrix ) ) +theme_classic()+ ggtitle( 'Variance') + theme(axis.text.x = element_text(angle = 60,hjust = 1))
### progression
form <- ~ (1|progression_group) + (1|visit_type) + (1|site) + (1|batch)
### run the model
variance_exprs_matrix <- fitExtractVarPartModel(y[!rownames(y) %in% my_doublets,], form, x)
plotVarPart( sortCols( variance_exprs_matrix ) ) +theme_classic()+ ggtitle( 'Variance') + theme(axis.text.x = element_text(angle = 60,hjust = 1))
y <- table(file_717_per_cell_md$subcluster_V03072023,file_717_per_cell_md$sample_id)
class(y) <- 'matrix'
#y <- log2(y+1)
for(i in 1:ncol(y)){ y[,i] <- y[,i]/sum(y[,i]) }
x <- file_717_per_cell_md[,which(colnames(file_717_per_cell_md) %in% c('davies_based_risk','sample_id','visit_type','siteXbatch','progression_group'))]
rownames(x) <- NULL
x <- unique(x)
x$site <- tidyr::separate(data.frame(x$siteXbatch),1, sep="_", c("a","b",'c','d'))$a
x$batch <- tidyr::separate(data.frame(x$siteXbatch),1, sep="_", c("a","b",'c','d'))$b
rownames(x) <- x$sample_id
x$sample_id <- NULL ; x$siteXbatch <- NULL
identical(colnames(y),rownames(x))
## [1] FALSE
x<-x[match(colnames(y),rownames(x)),]
identical(colnames(y),rownames(x))
## [1] TRUE
### subset
ix <- which(x$davies_based_risk %in% c('high_risk','standard_risk') & x$visit_type %in% c('baseline_diagnosis'))
x <- x[ix,]
y <- y[,ix]
### Design
form <- ~ 0 + progression_group + (1|site) + (1|batch)
### no weights because data is already normalized
L = makeContrastsDream( form, x,
contrasts = c(#'progression_groupInc - progression_groupNP','progression_groupP - progression_groupNP',
'progression_groupRP - progression_groupNP'))
### fit with contrasts
fitmm = dream( y, form, x, L)
### Store all outcome statistics
lmfreq_results <- list() ; for ( i in 1 ) { lmfreq_results[[i]] <- topTable(fitmm, coef=i,n=Inf,adjust.method="fdr")
lmfreq_results[[i]]$Comparison <- colnames(fitmm$coefficients)[i]
lmfreq_results[[i]]$Marker <- rownames( topTable(fitmm, coef=i,n=Inf,adjust.method="fdr") ) }
progression_res <- do.call(rbind,lmfreq_results)
rownames(progression_res) <- NULL
progression_res$nLogFDR <- -log10(progression_res$adj.P.Val)
progression_res$Comparison <- gsub('progression_group','',progression_res$Comparison)
### Risk
y <- table(file_717_per_cell_md$subcluster_V03072023,file_717_per_cell_md$sample_id)
class(y) <- 'matrix'
#y <- log2(y+1)
for(i in 1:ncol(y)){ y[,i] <- y[,i]/sum(y[,i]) }
x <- file_717_per_cell_md[,which(colnames(file_717_per_cell_md) %in% c('davies_based_risk','sample_id','visit_type','siteXbatch','progression_group'))]
rownames(x) <- NULL
x <- unique(x)
x$site <- tidyr::separate(data.frame(x$siteXbatch),1, sep="_", c("a","b",'c','d'))$a
x$batch <- tidyr::separate(data.frame(x$siteXbatch),1, sep="_", c("a","b",'c','d'))$b
rownames(x) <- x$sample_id
x$sample_id <- NULL ; x$siteXbatch <- NULL
identical(colnames(y),rownames(x))
## [1] FALSE
x<-x[match(colnames(y),rownames(x)),]
identical(colnames(y),rownames(x))
## [1] TRUE
### subset
ix <- which(x$davies_based_risk %in% c('high_risk','standard_risk') & x$visit_type %in% c('baseline_diagnosis'))
x <- x[ix,]
y <- y[,ix]
### Design
form <- ~ 0 + davies_based_risk + (1|site) + (1|batch)
### no weights because data is already normalized
L = makeContrastsDream( form,
x,
contrasts = c('davies_based_riskhigh_risk - davies_based_riskstandard_risk'))
### fit with contrasts
fitmm = dream( y, form, x, L)
### Store all outcome statistics
lmfreq_results <- list() ; for ( i in 1 ) { lmfreq_results[[i]] <- topTable(fitmm, coef=i,n=Inf,adjust.method="fdr")
lmfreq_results[[i]]$Comparison <- colnames(fitmm$coefficients)[i]
lmfreq_results[[i]]$Marker <- rownames( topTable(fitmm, coef=i,n=Inf,adjust.method="fdr") ) }
risk_res <- do.call(rbind,lmfreq_results)
rownames(risk_res) <- NULL
risk_res$nLogFDR <- -log10(risk_res$adj.P.Val)
risk_res$Comparison <- gsub('davies_based_risk','',risk_res$Comparison)
y <- table(file_717_per_cell_md$subcluster_V03072023,file_717_per_cell_md$sample_id)
class(y) <- 'matrix'
x <- file_717_per_cell_md[,which(colnames(file_717_per_cell_md) %in% c('davies_based_risk','sample_id','visit_type','siteXbatch','progression_group'))]
rownames(x) <- NULL
x <- unique(x)
x$site <- tidyr::separate(data.frame(x$siteXbatch),1, sep="_", c("a","b",'c','d'))$a
x$batch <- tidyr::separate(data.frame(x$siteXbatch),1, sep="_", c("a","b",'c','d'))$b
rownames(x) <- x$sample_id
x$sample_id <- NULL ; x$siteXbatch <- NULL
identical(colnames(y),rownames(x))
## [1] FALSE
x<-x[match(colnames(y),rownames(x)),]
identical(colnames(y),rownames(x))
## [1] TRUE
### subset
ix <- which(x$davies_based_risk %in% c('high_risk','standard_risk') & x$visit_type %in% c('baseline_diagnosis'))
x <- x[ix,]
y <- y[,ix]
y <- y[!rownames(y) %in% my_doublets,]
#y <- log2(y+1)
for(i in 1:ncol(y)){ y[,i] <- y[,i]/sum(y[,i]) }
### Design
form <- ~ 0 + progression_group + (1|site) + (1|batch)
### no weights because data is already normalized
L = makeContrastsDream( form, x,
contrasts = c(#'progression_groupInc - progression_groupNP','progression_groupP - progression_groupNP',
'progression_groupRP - progression_groupNP'))
### fit with contrasts
fitmm = dream( y, form, x, L)
### Store all outcome statistics
lmfreq_results <- list() ; for ( i in 1:1 ) { lmfreq_results[[i]] <- topTable(fitmm, coef=i,n=Inf,adjust.method="fdr")
lmfreq_results[[i]]$Comparison <- colnames(fitmm$coefficients)[i]
lmfreq_results[[i]]$Marker <- rownames( topTable(fitmm, coef=i,n=Inf,adjust.method="fdr") ) }
progression_res_ND <- do.call(rbind,lmfreq_results)
rownames(progression_res_ND) <- NULL
progression_res_ND$nLogFDR <- -log10(progression_res_ND$adj.P.Val)
progression_res_ND$Comparison <- gsub('progression_group','',progression_res_ND$Comparison)
### Risk
y <- table(file_717_per_cell_md$subcluster_V03072023,file_717_per_cell_md$sample_id)
class(y) <- 'matrix'
x <- file_717_per_cell_md[,which(colnames(file_717_per_cell_md) %in% c('davies_based_risk','sample_id','visit_type','siteXbatch','progression_group'))]
rownames(x) <- NULL
x <- unique(x)
x$site <- tidyr::separate(data.frame(x$siteXbatch),1, sep="_", c("a","b",'c','d'))$a
x$batch <- tidyr::separate(data.frame(x$siteXbatch),1, sep="_", c("a","b",'c','d'))$b
rownames(x) <- x$sample_id
x$sample_id <- NULL ; x$siteXbatch <- NULL
identical(colnames(y),rownames(x))
## [1] FALSE
x<-x[match(colnames(y),rownames(x)),]
identical(colnames(y),rownames(x))
## [1] TRUE
### subset
ix <- which(x$davies_based_risk %in% c('high_risk','standard_risk') & x$visit_type %in% c('baseline_diagnosis'))
x <- x[ix,]
y <- y[,ix]
y <- y[!rownames(y) %in% my_doublets,]
#y <- log2(y+1)
for(i in 1:ncol(y)){ y[,i] <- y[,i]/sum(y[,i]) }
### Design
form <- ~ 0 + davies_based_risk + (1|site) + (1|batch)
### no weights because data is already normalized
L = makeContrastsDream( form,
x,
contrasts = c('davies_based_riskhigh_risk - davies_based_riskstandard_risk'))
### fit with contrasts
fitmm = dream( y, form, x, L)
### Store all outcome statistics
lmfreq_results <- list() ; for ( i in 1 ) { lmfreq_results[[i]] <- topTable(fitmm, coef=i,n=Inf,adjust.method="fdr")
lmfreq_results[[i]]$Comparison <- colnames(fitmm$coefficients)[i]
lmfreq_results[[i]]$Marker <- rownames( topTable(fitmm, coef=i,n=Inf,adjust.method="fdr") ) }
risk_res_ND <- do.call(rbind,lmfreq_results)
rownames(risk_res_ND) <- NULL
risk_res_ND$nLogFDR <- -log10(risk_res_ND$adj.P.Val)
risk_res_ND$Comparison <- gsub('davies_based_risk','',risk_res_ND$Comparison)
risk_res_tmp <- risk_res[which(risk_res$Marker %in% risk_res_ND$Marker),]
identical(risk_res_tmp$Marker,risk_res_ND$Marker)
## [1] FALSE
a <-risk_res_tmp ; b <- risk_res_ND
cor.test(a$AveExpr,b$AveExpr,method='pearson')
##
## Pearson's product-moment correlation
##
## data: a$AveExpr and b$AveExpr
## t = 1.5683, df = 87, p-value = 0.1204
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.0439650 0.3615809
## sample estimates:
## cor
## 0.1658099
cor.test(a$logFC,b$logFC,method='pearson')
##
## Pearson's product-moment correlation
##
## data: a$logFC and b$logFC
## t = 3.9712, df = 87, p-value = 0.0001469
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1997734 0.5547335
## sample estimates:
## cor
## 0.3917325
cor.test(a$adj.P.Val,b$adj.P.Val,method='pearson')
##
## Pearson's product-moment correlation
##
## data: a$adj.P.Val and b$adj.P.Val
## t = 34.406, df = 87, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9473180 0.9770337
## sample estimates:
## cor
## 0.9651623
cor.test(a$P.Value,b$P.Value,method='pearson')
##
## Pearson's product-moment correlation
##
## data: a$P.Value and b$P.Value
## t = 124.84, df = 87, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9957610 0.9981776
## sample estimates:
## cor
## 0.9972203
x <- data.frame(c=b$Marker,logfc_nd=b$logFC,adpval_nd=b$adj.P.Val,m='ND')
y <- data.frame(c=a$Marker,logfc=a$logFC,adpval=a$adj.P.Val,m='all')
z<-merge(x,y,by='c')
ggplot(data=z)+aes(x=logfc_nd,y=logfc) + theme_classic()+geom_point()
ggplot(data=z)+aes(x=adpval_nd,y=adpval) + theme_classic()+geom_point()
progression_res_tmp <- progression_res[which(progression_res$Marker %in% progression_res_ND$Marker),]
a <- progression_res_tmp[progression_res_tmp$Comparison %in% 'RP - NP',]
b <- progression_res_ND[progression_res_ND$Comparison %in% 'RP - NP',]
identical(a$Marker,b$Marker)
## [1] FALSE
cor.test(a$AveExpr,b$AveExpr,method='pearson')
##
## Pearson's product-moment correlation
##
## data: a$AveExpr and b$AveExpr
## t = 1.8403, df = 87, p-value = 0.06913
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.01530472 0.38625499
## sample estimates:
## cor
## 0.193569
cor.test(a$logFC,b$logFC,method='pearson')
##
## Pearson's product-moment correlation
##
## data: a$logFC and b$logFC
## t = -2.062, df = 87, p-value = 0.04219
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.405871558 -0.007961722
## sample estimates:
## cor
## -0.2158605
cor.test(a$adj.P.Val,b$adj.P.Val,method='pearson')
##
## Pearson's product-moment correlation
##
## data: a$adj.P.Val and b$adj.P.Val
## t = 73.841, df = 87, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9879936 0.9948269
## sample estimates:
## cor
## 0.9921162
cor.test(a$P.Value,b$P.Value,method='pearson')
##
## Pearson's product-moment correlation
##
## data: a$P.Value and b$P.Value
## t = 197.06, df = 87, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9982940 0.9992671
## sample estimates:
## cor
## 0.9988817
### Risk
y <- table(file_717_per_cell_md$subcluster_V03072023,file_717_per_cell_md$sample_id)
class(y) <- 'matrix'
x <- file_717_per_cell_md[,which(colnames(file_717_per_cell_md) %in% c('davies_based_risk','sample_id','visit_type','siteXbatch','progression_group'))]
rownames(x) <- NULL
x <- unique(x)
x$site <- tidyr::separate(data.frame(x$siteXbatch),1, sep="_", c("a","b",'c','d'))$a
x$batch <- tidyr::separate(data.frame(x$siteXbatch),1, sep="_", c("a","b",'c','d'))$b
rownames(x) <- x$sample_id
x$sample_id <- NULL ; x$siteXbatch <- NULL
x<-x[match(colnames(y),rownames(x)),]
### Subset
ix <- which(x$davies_based_risk %in% c('high_risk','standard_risk') & x$visit_type %in% c('baseline_diagnosis'))
x <- x[ix,]
y <- y[,ix]
y <- y[!rownames(y) %in% my_doublets,]
Compartment <- c('NkT','BEry','Ery','Myeloid','Plasma') #,'Full'
res <- list()
for(iii in Compartment){
ix <- grep(paste('^',iii,sep=''), rownames(y))
z <- y[ix,]
z[z==0] <- 0.0001
for(i in 1:ncol(z)){ z[,i] <- z[,i]/sum(z[,i]) }
### Design
form <- ~ 0 + davies_based_risk + (1|site) + (1|batch)
### no weights because data is already normalized
L = makeContrastsDream( form, x,
contrasts = c('davies_based_riskstandard_risk - davies_based_riskhigh_risk'))
### fit with contrasts
fitmm = dream( z, form, x, L)
###
a <- topTable(fitmm, coef=1,n=Inf,adjust.method="fdr")
a$Comparison <- colnames(fitmm$coefficients)[1]
a$Marker <- rownames( topTable(fitmm, coef=1,n=Inf,adjust.method="fdr") )
a$nLogFDR <- -log10(a$adj.P.Val)
a$Comparison <- gsub('davies_based_risk','',a$Comparison)
res[[iii]] <- a
}
res_compartment_nd <- do.call(rbind,res)
rownames(res_compartment_nd) <- res_compartment_nd$Marker
risk_res_tmp <- risk_res_ND[which(risk_res_ND$Marker %in% res_compartment_nd$Marker),]
risk_res_tmp <- risk_res_tmp[match(res_compartment_nd$Marker,risk_res_tmp$Marker),]
identical(res_compartment_nd$Marker,risk_res_tmp$Marker)
## [1] TRUE
a <-risk_res_tmp ; b <- res_compartment_nd
a <-risk_res_tmp ; b <- res_compartment_nd
cor.test(a$AveExpr,b$AveExpr,method='pearson')
##
## Pearson's product-moment correlation
##
## data: a$AveExpr and b$AveExpr
## t = 5.2701, df = 86, p-value = 9.978e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3174936 0.6375489
## sample estimates:
## cor
## 0.4940811
cor.test(a$logFC,b$logFC,method='pearson')
##
## Pearson's product-moment correlation
##
## data: a$logFC and b$logFC
## t = -4.5239, df = 86, p-value = 1.933e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.5933895 -0.2521483
## sample estimates:
## cor
## -0.4384365
cor.test(a$adj.P.Val,b$adj.P.Val,method='pearson')
##
## Pearson's product-moment correlation
##
## data: a$adj.P.Val and b$adj.P.Val
## t = 3.0381, df = 86, p-value = 0.003153
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1089856 0.4888865
## sample estimates:
## cor
## 0.3113216
cor.test(a$P.Value,b$P.Value,method='pearson')
##
## Pearson's product-moment correlation
##
## data: a$P.Value and b$P.Value
## t = 4.6044, df = 86, p-value = 1.42e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2594278 0.5984127
## sample estimates:
## cor
## 0.4447069
x <- data.frame(c=b$Marker,logfc_nd=b$logFC,adpval_nd=b$adj.P.Val,m='ND')
y <- data.frame(c=a$Marker,logfc=a$logFC,adpval=a$adj.P.Val,m='all')
z<-merge(x,y,by='c')
ggplot(data=z)+aes(x=logfc_nd,y=logfc) + theme_classic()+geom_point()
ggplot(data=z)+aes(x=adpval_nd,y=adpval) + theme_classic()+geom_point()
y <- table(file_717_per_cell_md$subcluster_V03072023,file_717_per_cell_md$sample_id)
class(y) <- 'matrix'
#y <- log2(y+1)
x <- file_717_per_cell_md[,which(colnames(file_717_per_cell_md) %in% c('davies_based_risk','sample_id','visit_type','siteXbatch','progression_group'))]
rownames(x) <- NULL
x <- unique(x)
x$site <- tidyr::separate(data.frame(x$siteXbatch),1, sep="_", c("a","b",'c','d'))$a
x$batch <- tidyr::separate(data.frame(x$siteXbatch),1, sep="_", c("a","b",'c','d'))$b
rownames(x) <- x$sample_id
x$sample_id <- NULL ; x$siteXbatch <- NULL
identical(colnames(y),rownames(x))
## [1] FALSE
x<-x[match(colnames(y),rownames(x)),]
identical(colnames(y),rownames(x))
## [1] TRUE
### subset
ix <- which(x$davies_based_risk %in% c('high_risk','standard_risk') & x$visit_type %in% c('baseline_diagnosis'))
x <- x[ix,]
y <- y[,ix]
# to the transformation proposed by Smithson and Verkuilen (2006)
cell_proportions = DR_data( t(y ))
x$site_batch <- paste(x$site,x$batch,sep='_')
x$davies_based_risk <- factor(x$davies_based_risk,levels=c('standard_risk','high_risk'))
dr_fit_common <- DirichReg( cell_proportions ~ davies_based_risk + site_batch, x, model = "common" )
u = summary(dr_fit_common)
pvals = u$coef.mat[grep('davies_based_riskhigh_risk', rownames(u$coef.mat), invert=FALSE), 4]
v = names(pvals)
prob.ratio = exp( summary(dr_fit_common)$coefficients[paste0(rownames(y),":davies_based_riskhigh_risk")] )
pvals = matrix(pvals, ncol=length(u$varnames))
rownames(pvals) = gsub('davies_based_riskhigh_risk', '', v[1:nrow(pvals)])
colnames(pvals) = u$varnames
dirichlet_res <- data.frame(log2fc = log2(exp( summary(dr_fit_common)$coefficients[paste0(rownames(y),":davies_based_riskhigh_risk")] )) ,
pval=colMeans(pvals))
rownames(dirichlet_res) <- gsub(':davies_based_riskhigh_risk','',rownames(dirichlet_res))
dirichlet_res$Marker <- rownames(dirichlet_res)
risk_res_tmp <- dirichlet_res[which(dirichlet_res$Marker %in% risk_res$Marker),]
risk_res_tmp <- risk_res_tmp[match(risk_res$Marker,risk_res_tmp$Marker),]
identical(risk_res_tmp$Marker,risk_res$Marker)
## [1] TRUE
a <-risk_res_tmp ; b <- risk_res
cor.test(a$log2fc,b$logFC,method = 'pearson')
##
## Pearson's product-moment correlation
##
## data: a$log2fc and b$logFC
## t = 7.8663, df = 104, p-value = 3.601e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4753991 0.7178839
## sample estimates:
## cor
## 0.6107671
cor.test(a$pval,b$P.Value,method = 'pearson')
##
## Pearson's product-moment correlation
##
## data: a$pval and b$P.Value
## t = -0.23296, df = 104, p-value = 0.8162
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2126669 0.1686522
## sample estimates:
## cor
## -0.02283798
ggplot()+aes(x=a$log2fc,y=b$logFC*100)+geom_point() + theme_classic() +
geom_hline(yintercept = 0, linetype=2,color='deeppink') + geom_vline(xintercept = 0, linetype=2,color='deeppink') +
geom_smooth(color='skyblue1',method = 'glm')+labs(x='Dirichlet',y='Dream')
## `geom_smooth()` using formula = 'y ~ x'
y <- table(file_717_per_cell_md$subcluster_V03072023,file_717_per_cell_md$sample_id)
class(y) <- 'matrix'
#y <- log2(y+1)
x <- file_717_per_cell_md[,which(colnames(file_717_per_cell_md) %in% c('davies_based_risk','sample_id','visit_type','siteXbatch','progression_group'))]
rownames(x) <- NULL
x <- unique(x)
x$site <- tidyr::separate(data.frame(x$siteXbatch),1, sep="_", c("a","b",'c','d'))$a
x$batch <- tidyr::separate(data.frame(x$siteXbatch),1, sep="_", c("a","b",'c','d'))$b
rownames(x) <- x$sample_id
x$sample_id <- NULL ; x$siteXbatch <- NULL
identical(colnames(y),rownames(x))
## [1] FALSE
x<-x[match(colnames(y),rownames(x)),]
identical(colnames(y),rownames(x))
## [1] TRUE
### subset
ix <- which(x$davies_based_risk %in% c('high_risk','standard_risk') & x$visit_type %in% c('baseline_diagnosis'))
x <- x[ix,]
y <- y[,ix]
y <- y[!rownames(y) %in% my_doublets,]
# to the transformation proposed by Smithson and Verkuilen (2006)
cell_proportions = DR_data( t(y ))
x$site_batch <- paste(x$site,x$batch,sep='_')
x$davies_based_risk <- factor(x$davies_based_risk,levels=c('standard_risk','high_risk'))
dr_fit_common <- DirichReg( cell_proportions ~ davies_based_risk + site_batch, x, model = "common" )
u = summary(dr_fit_common)
pvals = u$coef.mat[grep('davies_based_riskhigh_risk', rownames(u$coef.mat), invert=FALSE), 4]
v = names(pvals)
prob.ratio = exp( summary(dr_fit_common)$coefficients[paste0(rownames(y),":davies_based_riskhigh_risk")] )
pvals = matrix(pvals, ncol=length(u$varnames))
rownames(pvals) = gsub('davies_based_riskhigh_risk', '', v[1:nrow(pvals)])
colnames(pvals) = u$varnames
dirichlet_ND_res <- data.frame(log2fc = log2(exp( summary(dr_fit_common)$coefficients[paste0(rownames(y),":davies_based_riskhigh_risk")] )) ,
pval=colMeans(pvals))
rownames(dirichlet_ND_res) <- gsub(':davies_based_riskhigh_risk','',rownames(dirichlet_ND_res))
dirichlet_ND_res$Marker <- rownames(dirichlet_ND_res)
risk_res_tmp <- dirichlet_ND_res[which(dirichlet_ND_res$Marker %in% risk_res_ND$Marker),]
risk_res_tmp <- risk_res_tmp[match(risk_res_ND$Marker,risk_res_tmp$Marker),]
identical(risk_res_tmp$Marker,risk_res_ND$Marker)
## [1] TRUE
a <-risk_res_tmp ; b <- risk_res_ND
ggplot()+aes(x=a$log2fc,y=b$logFC*100)+geom_point() + theme_classic() +
geom_hline(yintercept = 0, linetype=2,color='deeppink') + geom_vline(xintercept = 0, linetype=2,color='deeppink') +
geom_smooth(color='skyblue1',method = 'glm')+labs(x='Dirichlet',y='Dream')
## `geom_smooth()` using formula = 'y ~ x'
###
my_cells <- rownames(y)
###
subcluster_V2_df <- melt(table(file_717_per_cell_md$subcluster_V03072023,
file_717_per_cell_md$sample_id,
file_717_per_cell_md$doublet_pred_edit,
file_717_per_cell_md$visit_type,
file_717_per_cell_md$davies_based_risk,
file_717_per_cell_md$siteXbatch))
colnames(subcluster_V2_df) <- c('Subcluster','sample_id','doublet','visit','risk','batch_site','cells')
pdf(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/figures/review_diag_boxplots_cell_cluster_per_batch_site_baseline.pdf',width = 3,height = 7)
for(i in 1:length(my_cells)){
x <- subcluster_V2_df[subcluster_V2_df$Subcluster %in% my_cells[i] &
subcluster_V2_df$risk %in% c('high_risk','standard_risk') &
subcluster_V2_df$visit %in% c('baseline_diagnosis'),]
x$log10cells <- log10(x$cells+1)
x <- x[x$log10cells>0,]
print(
ggplot(data=x)+aes(x=risk,y=log10cells)+geom_boxplot(outlier.size = NA,outlier.shape =NA)+
theme_classic()+geom_quasirandom()+
labs(title = paste(my_cells[i]))+facet_wrap(~batch_site,ncol=3)+
rotate_x_text(angle = 90)
)
}
dev.off()
## quartz_off_screen
## 2
for(i in 1:length(my_cells)){
x <- subcluster_V2_df[subcluster_V2_df$Subcluster %in% my_cells[i] &
subcluster_V2_df$risk %in% c('high_risk','standard_risk') &
subcluster_V2_df$visit %in% c('baseline_diagnosis'),]
x$log10cells <- log10(x$cells+1)
x <- x[x$log10cells>0,]
print(
ggplot(data=x)+aes(x=risk,y=log10cells)+geom_boxplot(outlier.size = NA,outlier.shape =NA)+
theme_classic()+geom_quasirandom()+
labs(title = paste(my_cells[i]))+facet_wrap(~batch_site,ncol=3)+
rotate_x_text(angle = 90)
)
}
Duplicated code (identical to Yered’s). Used for comparison purposes.
y <- table(file_717_per_cell_md$subcluster_V03072023,file_717_per_cell_md$sample_id)
class(y) <- 'matrix'
#y <- log2(y+1)
x <- file_717_per_cell_md[,which(colnames(file_717_per_cell_md) %in% c('davies_based_risk','sample_id','visit_type','siteXbatch','progression_group'))]
rownames(x) <- NULL
x <- unique(x)
x$site <- tidyr::separate(data.frame(x$siteXbatch),1, sep="_", c("a","b",'c','d'))$a
x$batch <- tidyr::separate(data.frame(x$siteXbatch),1, sep="_", c("a","b",'c','d'))$b
rownames(x) <- x$sample_id
x$sample_id <- NULL ; x$siteXbatch <- NULL
identical(colnames(y),rownames(x))
## [1] FALSE
x<-x[match(colnames(y),rownames(x)),]
identical(colnames(y),rownames(x))
## [1] TRUE
### subset
ix <- which(x$davies_based_risk %in% c('high_risk','standard_risk') & x$visit_type %in% c('baseline_diagnosis'))
x <- x[ix,]
y <- y[,ix]
y <- y[!rownames(y) %in% my_doublets,]
my_res_list<-list()
Compartment <- c('NkT','BEry','Ery','Myeloid','Plasma') #,'Full'
res <- list()
for(iii in Compartment){
ix <- grep(paste('^',iii,sep=''), rownames(y))
z <- y[ix,]
# to the transformation proposed by Smithson and Verkuilen (2006)
cell_proportions = DR_data(t(z))
x$site_batch <- paste(x$site,x$batch,sep='_')
x$davies_based_risk <- factor(x$davies_based_risk,levels=c('standard_risk','high_risk'))
dr_fit_common <- DirichReg( cell_proportions ~ davies_based_risk + site_batch, x, model = "common" )
u = summary(dr_fit_common)
pvals = u$coef.mat[grep('davies_based_riskhigh_risk', rownames(u$coef.mat), invert=FALSE), 4]
v = names(pvals)
prob.ratio = exp( summary(dr_fit_common)$coefficients[paste0(rownames(z),":davies_based_riskhigh_risk")] )
pvals = matrix(pvals, ncol=length(u$varnames))
rownames(pvals) = gsub('davies_based_riskhigh_risk', '', v[1:nrow(pvals)])
colnames(pvals) = u$varnames
dirichlet_ND_comp_res <- data.frame(log2fc = log2(exp( summary(dr_fit_common)$coefficients[paste0(rownames(z),":davies_based_riskhigh_risk")] )) ,
pval=colMeans(pvals),
Marker=rownames(z))
rownames(dirichlet_ND_comp_res) <- gsub(':davies_based_riskhigh_risk','',rownames(dirichlet_ND_comp_res))
my_res_list[[iii]] <- dirichlet_ND_comp_res
}
dirichlet_ND_comp_res <- do.call(rbind,my_res_list)
dirichlet_ND_res$Marker <- rownames(dirichlet_ND_res)
risk_res_tmp <- dirichlet_ND_res[which(dirichlet_ND_res$Marker %in% dirichlet_ND_comp_res$Marker),]
risk_res_tmp <- risk_res_tmp[match(dirichlet_ND_comp_res$Marker,risk_res_tmp$Marker),]
identical(risk_res_tmp$Marker,dirichlet_ND_comp_res$Marker)
## [1] TRUE
a <-risk_res_tmp ; b <- dirichlet_ND_comp_res
cor.test(a$log2fc,b$log2fc,method='pearson')
##
## Pearson's product-moment correlation
##
## data: a$log2fc and b$log2fc
## t = 16.866, df = 86, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8167328 0.9173593
## sample estimates:
## cor
## 0.8762801
cor.test(a$pval,b$pval,method='pearson')
##
## Pearson's product-moment correlation
##
## data: a$pval and b$pval
## t = 12.944, df = 86, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7272917 0.8736110
## sample estimates:
## cor
## 0.8129073
Similarity between logfc and pvalue falls into 82% and 81% agreement respective.
saveRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_diag_risk_res_ND_DAA_dream.RDS',risk_res_ND)
saveRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_diag_prog_res_ND_DAA_dream.RDS',progression_res_ND)
saveRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_diag_risk_res_DAA_dream.RDS',risk_res)
saveRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_diag_prog_res_DAA_dream.RDS',progression_res)
saveRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_diag_res_compartment_nd_DAA_dream.RDS',res_compartment_nd)
saveRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_diag_dirichlet_res_DAA_dream.RDS',dirichlet_res)
saveRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_diag_dirichlet_ND_res_DAA_dream.RDS',dirichlet_ND_res)
saveRDS(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_diag_dirichlet_ND_comp_res_DAA_dream.RDS',dirichlet_ND_comp_res)
write.csv(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_diag_risk_res_ND_DAA_dream.csv',risk_res_ND)
write.csv(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_diag_prog_res_ND_DAA_dream.csv',progression_res_ND)
write.csv(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_diag_risk_res_DAA_dream.csv',risk_res)
write.csv(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_diag_prog_res_DAA_dream.csv',progression_res)
write.csv(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_diag_res_compartment_nd_DAA_dream.csv',res_compartment_nd)
write.csv(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_diag_prog_res_DAA_dream.csv',progression_res)
write.csv(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_diag_dirichlet_res_DAA_dream.csv',dirichlet_res)
write.csv(file='/Users/gonzae34/Documents/projects_gnjatic/MMRF_projects/immune_atlas/RData/results_diag_dirichlet_ND_comp_res_DAA_dream.csv',dirichlet_ND_comp_res)